/**************************************
Author: Kevin Stange
Date: 06/24/2021
Update: 6/24/2021
Update: 9/28/2021 included new disclosed results
Update: 5/8/2022 included new disclosed results
Update: 1/31/2024 include more states
Update: 2/19/2024 Combined do-files
Update: 7/9/2025 Re-annotaed
*************************************/
set linesize 200
version 16
set more off 
set scheme s1mono 
*include config.do


/* Analysis of Top School Bias */
import delimited "$datadir\20230920\bias_scatter_barrons_byyear_2023-09-08.csv", clear 

label def statel 8 "CO" 36 "NY" 39 "OH" 42 "PA" 48 "TX" 13 "GA" 27 "MN" 29 "MO" 49 "UT" 51 "VA"
label val state statel
tab grad_year state 

tab grad_year state  [fw = out_n]

tab grad_year state  [fw = missing_n]

* Need to drop 2001 and 2002 cohorts in PA. They are really small for some reason

gen bias = beta_out - beta_in 

**** TABLE A5 *****************

* First including all cohorts

 
table year_post state [aw=out_n], c(mean bias) col row 

	
table year_post state [aw=missing_n], c(mean missing_diff) col row 



* Now exclude 2001 and 2002 cohorts because they are weird in PA and NY
table year_post state if grad_year >= 2003 [aw=out_n], c(mean bias) col row 

table year_post state if grad_year >= 2003 [aw=missing_n], c(mean missing_diff) col row 

/********************************************
Creating T-statistics
********************************************/

gen se_bias = sqrt(se_in^2 + se_out^2) 

gen t_stat = bias/se_bias

gen sig = (abs(t_stat) > 1.96)

gen bias_sig = bias if sig == 1 
tab sig, m
#delimit ; 
twoway 
		/* first, graph all the bias with the "hollow" */
		(scatter bias missing_diff if state==8 & grad_year >= 2003, msymbol(circle_hollow) mc(green)) 
		(scatter bias missing_diff if state==36 & grad_year >= 2003, msymbol(diamond_hollow) mc(blue)) 
		(scatter bias missing_diff if state==39 & grad_year >= 2003, msymbol(triangle_hollow) mc(pink))  
		(scatter bias missing_diff if state==42 & grad_year >= 2003, msymbol(square_hollow) mc(gs4)) 
		(scatter bias missing_diff if state==48 & grad_year >= 2003, msymbol(smcircle_hollow) mc(red)) 
		(scatter bias missing_diff if state==13 & grad_year >=2003, msymbol(smdiamond_hollow) mc(orange))
		(scatter bias missing_diff if state==27 & grad_year >=2003, msymbol(smtriangle_hollow) mc(lime))
		(scatter bias missing_diff if state==29 & grad_year >=2003, msymbol(smsquare_hollow) mc(cranberry))
		(scatter bias missing_diff if state==51 & grad_year >=2003, msymbol(diamond_hollow) msize(medlarge) mc(gold)), 
		legend(label(1 "CO") label(2 "NY") label(3 "OH") label(4 "PA") label(5 "TX")
			label(6 "GA") label(7 "MN") label(8 "MO") label(9 "VA") rows(2) )
		ytitle("Bias") xtitle("Difference in Rate of No In-State Earnings") ;
	 	
graph export "$graphdir/betas_by_state_scatter_$date.png", replace  ;


****************************************************************************************
* Code to produce Table 4: regressing cell-level bias on difference in missingness
****************************************************************************************

clear 


import delimited "$datadir\20230920\bias_scatter_barrons_byyear_2023-09-08.csv", clear 
gen bias = beta_out - beta_in 

#delimit ;
cap postclose bias ;
postfile bias column beta se obs rsq using "$graphdir\bias_regs.dta", replace ;
local i  = 1 ;
reg bias missing_diff , r ; 

post bias (`i') (_b[missing_diff]) (_se[missing_diff]) (e(N)) (e(r2)) ; 

local i = `i'+ 1 ;
reg bias missing_diff i.grad_year , r; 

post bias (`i') (_b[missing_diff]) (_se[missing_diff]) (e(N)) (e(r2)) ; 

local i = `i'+ 1 ;

reg bias missing_diff i.grad_year i.year_post , r ; 

post bias (`i') (_b[missing_diff]) (_se[missing_diff]) (e(N)) (e(r2)) ; 

local i = `i'+ 1 ;

reg bias missing_diff i.grad_year i.year_post i.state , r ; 

post bias (`i') (_b[missing_diff]) (_se[missing_diff]) (e(N)) (e(r2)) ; 

local i = `i'+ 1 ;

postclose bias ; 

use "$graphdir\bias_regs.dta", clear ;

replace se = round(se,.0001);
replace beta = round(beta,0.0001);
replace rsq  = round(rsq,0.001) ;
tostring se, replace force ; 

replace se = "("+ se + ")";


outsheet using "$graphdir\bias_regs.csv", replace ;




	 
	 
